This section is obtained from Google SunRoof Project
Solar panels
Solar panels are made up of photovoltaic (PV) cells, which convert sunlight into direct current (DC) electricity.
Inverter
An inverter converts the DC electricity generated by solar panels into the alternating current (AC) electricity used in most homes
Monitoring System A monitoring system sends information about your solar installation, including the amount of energy it generates, to your solar provider, to maintain optimal system performance.
Utility grid
Your solar system is still connected to a local utility grid to provide power after sundown. Connection to the grid also allows your solar system to return to the grid any excess clean energy your system generates during the day. A utility meter measures your power consumption and solar output.
We explore the data as outlined in the Table Of Contents.
library(tidyverse)
library(leaflet)
library(rgdal)
library(stringr)
library(DT)
library(jsonlite)
rm(list=ls())
fillColor = "#FFA07A"
fillColor2 = "#F1C40F"
sunroof = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/sunroof_solar_potential_by_censustract.csv")
ls(sunroof)## [1] "carbon_offset_metric_tons"
## [2] "count_qualified"
## [3] "existing_installs_count"
## [4] "install_size_kw_buckets"
## [5] "kw_median"
## [6] "kw_total"
## [7] "lat_avg"
## [8] "lat_max"
## [9] "lat_min"
## [10] "lng_avg"
## [11] "lng_max"
## [12] "lng_min"
## [13] "number_of_panels_e"
## [14] "number_of_panels_f"
## [15] "number_of_panels_median"
## [16] "number_of_panels_n"
## [17] "number_of_panels_s"
## [18] "number_of_panels_total"
## [19] "number_of_panels_w"
## [20] "percent_covered"
## [21] "percent_qualified"
## [22] "region_name"
## [23] "state_name"
## [24] "yearly_sunlight_kwh_e"
## [25] "yearly_sunlight_kwh_f"
## [26] "yearly_sunlight_kwh_kw_threshold_avg"
## [27] "yearly_sunlight_kwh_median"
## [28] "yearly_sunlight_kwh_n"
## [29] "yearly_sunlight_kwh_s"
## [30] "yearly_sunlight_kwh_total"
## [31] "yearly_sunlight_kwh_w"
In this table below, only the Lowest Twenty Percentage Values are shown.
GetPercentageOfNonNullValues <- function(OurDataSet) {
PercentageOfNonNullValues = ( colSums(!is.na(OurDataSet)) / nrow(OurDataSet) ) * 100
PercentageOfNonNullValues = as.data.frame(PercentageOfNonNullValues)
PercentageOfNonNullValues = rownames_to_column(PercentageOfNonNullValues,"VariableName")
PercentageOfNonNullValues = PercentageOfNonNullValues %>% arrange(desc(PercentageOfNonNullValues))
PercentageOfNonNullValues = PercentageOfNonNullValues %>%
mutate(VariableName = reorder(VariableName,PercentageOfNonNullValues)) %>% tail(20)
datatable(PercentageOfNonNullValues, style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
}
GetPercentageOfNonNullValues(sunroof)We observe all the variables have more than 99% values. Looking forward for an interesting analysis.
We find the states which has regions with Hundred percentage Qualification for Solar Roof Top. In the below bar plot, we show the states which has the Maximum number of such regions.
sunroof$percent_qualified = as.numeric(sunroof$percent_qualified)
sunroof %>%
filter(percent_qualified == 100) %>%
group_by(state_name) %>%
tally() %>%
mutate(state_name = reorder(state_name,n)) %>%
arrange(desc(n)) %>%
head(20) %>%
ggplot(aes(x = state_name,y = n)) +
geom_bar(stat='identity',colour="white", fill =fillColor2) +
geom_text(aes(x = state_name, y = 1, label = paste0("(",n,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'State', y = 'Count Hundred Percent Qualified',
title = 'States with Regions Hundred Percent Qualified for Solar') +
coord_flip() +
theme_bw()This choropleth shows the No of Regions in each state which is 100 Percentage Qualified for Solar Roof Top.
StateCoverageRegions = sunroof %>%
filter(percent_qualified == 100) %>%
group_by(state_name) %>%
tally() %>%
mutate(state_name = reorder(state_name,n))
states <- readOGR(dsn = "C:/Users/ddalv/Documents/Courses/Capstone/us-states-cartographic-boundary-shapefiles/cb_2016_us_state_500k.shp")## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\ddalv\Documents\Courses\Capstone\us-states-cartographic-boundary-shapefiles\cb_2016_us_state_500k.shp", layer: "cb_2016_us_state_500k"
## with 56 features
## It has 9 fields
## Integer64 fields read as strings: ALAND AWATER
statesOriginal = states
StateCoverageRegions$state_name = as.character(StateCoverageRegions$state_name)
states@data = left_join(states@data, StateCoverageRegions, by = c("NAME" = "state_name"))
bins = c(0,5, 10, 15, 20, 25,35, 45)
pal = colorBin("YlOrRd", domain = states@data$n, bins = bins)
labels = sprintf(
"<strong>%s</strong><br/>%g",
states@data$NAME, states@data$n
) %>% lapply(htmltools::HTML)
leaflet(data = states) %>% setView(-96, 37.8, 4) %>%
addPolygons(
fillColor = ~pal(n),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal, values = ~n, opacity = 0.7, title = "#Post Codes 100 %age Qualified for Solar Roof Top",
position = "bottomleft")We observe that the States of New York , California , Texas , Pennsylvania , Illinois have very High Number of Post Codes with Hundred Percent Qualification for Solar.
We observe that New York has the Highest number of regions with Hundred Percent Qualifications for Solar. Therefore we now go deeper and examine the Counties in New York and show in a bar plot the counties and their Percentage Qualification for Solar.
CountyData = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/project-sunroof-county-09082017.csv")
NYCountyData = CountyData %>%
filter(state_name == "New York") %>%
arrange(desc(percent_qualified)) %>%
mutate(region_name = reorder(region_name,percent_qualified))
NYCountyData %>%
head(20) %>%
ggplot(aes(x = region_name,y = percent_qualified)) +
geom_bar(stat='identity',colour="white", fill =fillColor) +
geom_text(aes(x = region_name, y = 1, label = paste0("(",round(percent_qualified),"%",")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'County Name', y = 'Percent Qualified for Solar',
title = 'Counties in NY With Percent Qualified for Solar') +
coord_flip() +
theme_bw()If you are not familiar with the Carbon Offset, I have the following information from Wikipedia.
A carbon offset is a reduction in emissions of carbon dioxide or greenhouse gases made in order to compensate for or to offset an emission made elsewhere.
Carbon offsets are measured in metric tons of carbon dioxide-equivalent (CO2e) and may represent six primary categories of greenhouse gases:[5] carbon dioxide (CO2), methane (CH4), nitrous oxide (N2O), perfluorocarbons (PFCs), hydrofluorocarbons (HFCs), and sulfur hexafluoride (SF6).[6] One carbon offset represents the reduction of one metric ton of carbon dioxide or its equivalent in other greenhouse gases.
CarbonOffsetState = sunroof %>%
group_by(state_name) %>%
summarise(CarbonOffset = sum(carbon_offset_metric_tons)) %>%
arrange(desc(CarbonOffset)) %>%
mutate(state_name = reorder(state_name,CarbonOffset))
CarbonOffsetState %>%
head(20)%>%
ggplot(aes(x = state_name,y = CarbonOffset)) +
geom_bar(stat='identity',colour="white", fill =fillColor2) +
geom_text(aes(x = state_name, y = 1, label = paste0("(",round(CarbonOffset/1000000)," M ",")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'State', y = 'Carbon Offset',
title = 'States and Carbon Offset in Million Metric Tons') +
coord_flip() +
theme_bw()We examine the states for Carbon Offset Potential through a choropleth.
CarbonOffsetState = CarbonOffsetState %>%
mutate(CarbonOffsetInMillions = CarbonOffset/1000000)
states = statesOriginal
states@data = left_join(states@data, CarbonOffsetState, by = c("NAME" = "state_name"))
bins <- c(0,7, 15, 20, 30,40,50, 55,60)
pal <- colorBin("YlOrRd", domain = states@data$CarbonOffsetInMillions, bins = bins)
labels <- sprintf(
"<strong>%s</strong><br/>%g",
states@data$NAME, states@data$CarbonOffsetInMillions
) %>% lapply(htmltools::HTML)
leaflet(data = states) %>% setView(-96, 37.8, 4) %>%
addPolygons(
fillColor = ~pal(CarbonOffsetInMillions),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal, values = ~CarbonOffsetInMillions, opacity = 0.7, title = "Carbon Offset Potential in Millions Metric Tons",
position = "bottomleft")We observe that Texas , Florida and California have very high potential for Carbon Offset.
We show the Choropleth for the Texas Counties with the Carbon Offset Data
USCounties <- readOGR(dsn = "C:/Users/ddalv/Documents/Courses/Capstone/us-states-cartographic-boundary-shapefiles/cb_2016_48_cousub_500k.shp")## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\ddalv\Documents\Courses\Capstone\us-states-cartographic-boundary-shapefiles\cb_2016_48_cousub_500k.shp", layer: "cb_2016_48_cousub_500k"
## with 862 features
## It has 10 fields
## Integer64 fields read as strings: ALAND AWATER
CountyData = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/project-sunroof-county-09082017.csv")
TexasCountyData = CountyData %>%
filter(state_name == "Texas") %>%
group_by(region_name) %>%
summarise(CarbonOffset = sum(carbon_offset_metric_tons)/1000) %>%
arrange(desc(CarbonOffset)) %>%
mutate(region_name = reorder(region_name,CarbonOffset))
TexasCounties2 = sapply(str_split(TexasCountyData$region_name," "), head, 1)
TexasCountyData$region_name = TexasCounties2
USCounties@data$NAME = as.character(USCounties@data$NAME)
USCounties@data = inner_join(USCounties@data, TexasCountyData, by = c("NAME" = "region_name"))
bins <- c(0,500,1000, 2000, 3000, 5000, 6000,9000, 13000)
pal <- colorBin("YlOrRd", domain = USCounties@data$CarbonOffset, bins = bins)
labels <- sprintf(
"<strong>%s</strong><br/>%g",
USCounties@data$NAME, USCounties@data$CarbonOffset
) %>% lapply(htmltools::HTML)
leaflet(data = USCounties) %>% setView(-98.613281, 31.203405, 6) %>%
addPolygons(
fillColor = ~pal(CarbonOffset),
weight = 2,
opacity = 1,
color = "white",
dashArray = "3",
fillOpacity = 0.7,
highlight = highlightOptions(
weight = 5,
color = "#666",
dashArray = "",
fillOpacity = 0.7,
bringToFront = TRUE),
label = labels,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "15px",
direction = "auto")) %>%
addLegend(pal = pal, values = ~CarbonOffset, opacity = 0.7, title = "Carbon Offset in '000 metric Tons",
position = "bottomright")CityData = read_csv("C:/Users/ddalv/Documents/Courses/Capstone/google-project-sunroof/project-sunroof-city-09082017.csv")
HoustonCity = CityData %>%
filter(region_name == "Houston") %>%
filter(state_name == "Texas")
HoustonBuckets = jsonlite::fromJSON(HoustonCity$install_size_kw_buckets_json, simplifyDataFrame = TRUE)
Buckets = data.frame(BucketName = character(), BucketKW = numeric())
for( counter in 1:10)
{
BucketName = paste0(HoustonBuckets[counter,1],"-",HoustonBuckets[counter+1,1])
BucketRow = data.frame(BucketName = BucketName, BucketKW = HoustonBuckets[counter,2])
Buckets = rbind(Buckets,BucketRow)
}
Buckets %>%
ggplot(aes(x = BucketName,y = BucketKW)) +
geom_bar(stat='identity',colour="white", fill =fillColor2) +
labs(x = 'Installation Size in kW', y = 'Count of Buildings',
title = 'Rooftop solar capacity distribution (number of roofs, < 50kW)') +
theme_bw()Buckets = data.frame(BucketName = character(), BucketKW = numeric())
BucketSize = 50
counter = 11
while( BucketSize <= 1000)
{
BucketName = paste0(HoustonBuckets[counter,1],"-",HoustonBuckets[counter+10,1])
BucketKW = 0
counter2 = counter +10
while( counter < counter2)
{
BucketKW = BucketKW + as.numeric(HoustonBuckets[counter,2])
counter = counter + 1
}
BucketRow = data.frame(BucketName = BucketName, BucketKW = BucketKW)
if( BucketSize < 1000)
{
BucketSize = HoustonBuckets[counter+10,1]
}
else
{
BucketSize = 1001
}
Buckets = rbind(Buckets,BucketRow)
}
Buckets %>%
ggplot(aes(x = BucketName,y = BucketKW)) +
geom_bar(stat='identity',colour="white", fill =fillColor2) +
labs(x = 'Installation Size in kW', y = 'Count of Buildings',
title = 'Rooftop solar capacity distribution (number of roofs, 50kW-1MW)') +
theme_bw() + theme(axis.text.x = element_text(angle=90, vjust=0.5, size=10))Description of the different roof faces and the total solar energy generation is shown below
| Type | Description |
|---|---|
| yearly_sunlight_kwh_f | total solar energy generation potential for flat roof space in that region |
| yearly_sunlight_kwh_n | total solar energy generation potential for north roof space in that region |
| yearly_sunlight_kwh_e | total solar energy generation potential for east roof space in that region |
| yearly_sunlight_kwh_s | total solar energy generation potential for south roof space in that region |
| yearly_sunlight_kwh_w | total solar energy generation potential for west roof space in that region |
HoustonCity2 = gather(HoustonCity, key = Type, value = measurement,yearly_sunlight_kwh_f,
yearly_sunlight_kwh_n,
yearly_sunlight_kwh_s, yearly_sunlight_kwh_e, yearly_sunlight_kwh_w)
HoustonCity2 = HoustonCity2 %>%
select(Type,measurement) %>%
mutate( Type = reorder(Type,measurement))
HoustonCity2 %>%
ggplot(aes(x = Type,y = measurement)) +
geom_bar(stat='identity',colour="white", fill =fillColor2) +
labs(x = 'Roof Face Type', y = 'Measurement in kwh',
title = 'Total solar energy generation potential in different Directions') +
coord_flip() +
theme_bw()Description of the different roof faces and the total solar energy generation is shown below
| Type | Description |
|---|---|
| number_of_panels_f | # of solar panels potential for flat roof space in that region, assuming 1.650m x 0.992m panels |
| number_of_panels_n | # of solar panels potential for north roof space in that region, assuming 1.650m x 0.992m panels |
| number_of_panels_e | # of solar panels potential for east roof space in that region, assuming 1.650m x 0.992m panels |
| number_of_panels_s | # of solar panels potential for south roof space in that region, assuming 1.650m x 0.992m panels |
| number_of_panels_w | # of solar panels potential for west roof space in that region, assuming 1.650m x 0.992m panels |
HoustonCity2 = gather(HoustonCity, key = Type, value = measurement,
number_of_panels_f,number_of_panels_n,
number_of_panels_s, number_of_panels_e,
number_of_panels_w)
HoustonCity2 = HoustonCity2 %>%
select(Type,measurement) %>%
mutate( Type = reorder(Type,measurement))
HoustonCity2 %>%
ggplot(aes(x = Type,y = measurement)) +
geom_bar(stat='identity',colour="white", fill =fillColor) +
labs(x = 'Direction of Panels', y = 'Number of Potential Panels',
title = 'Number of Potential Panels in different Directions') +
coord_flip() +
theme_bw()We examine the Carbon Offset Potential of the Cities and plot the Top Twenty in a bar plot.
CarbonOffsetCities = CityData %>%
arrange(desc(carbon_offset_metric_tons)) %>%
head(20)
CarbonOffsetCities %>%
mutate(region_name = reorder(region_name,carbon_offset_metric_tons)) %>%
ggplot(aes(x = region_name,y = carbon_offset_metric_tons)) +
geom_bar(stat='identity',colour="white", fill =fillColor2) +
geom_text(aes(x = region_name, y = 1,
label = paste0("(",round( (carbon_offset_metric_tons/10e+3),2),")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Cities', y = 'Carbon Offset Potential in Thousands Metric Tons',
title = 'Cities and Carbon Offset Potential in Thousands Metric Tons') +
coord_flip() +
theme_bw()We plot the map of Top Fifty Cities which have high Carbon Offset Potential.
CarbonOffsetCities = CityData %>%
arrange(desc(carbon_offset_metric_tons)) %>%
head(50)
center_lon = median(CarbonOffsetCities$lng_avg)
center_lat = median(CarbonOffsetCities$lat_avg)
leaflet(CarbonOffsetCities) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
addCircles(lng = ~lng_avg, lat = ~lat_avg,radius = ~sqrt(carbon_offset_metric_tons)*30,
color = c("red")) %>%
# controls
setView(lng=center_lon, lat=center_lat, zoom=4)